using CSV, DataFrames, MAT, LinearAlgebra,  JLD2, HDF5, Random, StatsFuns, DataStructures, Distributions, StatsPlots
include("setup.jl")
include("getBounds.jl")
# include("mcmcfunctions.jl")
# include("counterfactualFunctionsNew.jl")
# include("modelfit.jl")
include("tableHelperFunctions.jl")

const datapath = "G:/.shortcut-targets-by-id/19Q7cGujagvIhQ_OhZwxSp2KsSfSxPYpS/4_clean" #"G:/My Drive/4_clean"
const figurepath = "C:/Users/akapor/Documents/GitHub/Platform-Externalities/5_paper/Round 2/Paper/figures/mcmc"
const tablepath = "C:/Users/akapor/Documents/GitHub/Platform-Externalities/5_paper/Round 2/Paper/tables"
const outputpath = datapath*"/from_Julia/chain2"

save_lv = 2500:200:7500
bws = [60000, 10000, 5000, 2500, 1000, 0]
resimulate = false #set true if you want to rebuild all this stuff! 

function computeAppStats(MCMC,bw,apps=MCMC["Apps"])
    year = Int(MCMC["year"])
    mysample = 1:Int(MCMC["I"])
    AppsRelevant,AppsAll,clearlyInfeasible,clearlyFeasible = restrictToRelevantSet(MCMC,mysample,bw,bw,apps)
    eligible = BitArray{2}(MCMC["EligibleToApply"])
    platform = BitArray{1}(vec(MCMC["platform"]))
    #construct indicators for application to UC or PUC program
    #_UC_PUC = Set( Int.( MCMC["TypeIndex"][:,4][(MCMC["TypeIndex"][:,3] .== 10) .| (MCMC["TypeIndex"][:,3] .== 11)]))
    _isUCPUC = (MCMC["TypeIndex"][:,3] .== 10) .| (MCMC["TypeIndex"][:,3] .== 11)
    isUCPUC = findall(_isUCPUC)
    _UC_PUC = Set(isUCPUC)
    ar = zeros(Int,size(AppsRelevant))
    top4binds = fill(false,length(mysample))
    atleast4apps = fill(false,length(mysample))
    Threads.@threads for ii=1:size(ar,1)
        for jj=1:size(ar,2)
            AppsRelevant[ii,jj]==0 && break
            ar[ii,jj] = (AppsRelevant[ii,jj] in _UC_PUC)
            jj >= 4 && (atleast4apps[ii] = true)
            jj > 4 && _isUCPUC[AppsRelevant[ii,jj]] && (top4binds[ii] = true)
        end
    end
    Napps_uc_puc = sum(ar,dims=2)
    clearlyFeasible = clearlyFeasible .& eligible .& platform'
    clearlyInfeasible = platform' .& (clearlyInfeasible .|  .!(eligible))
    #clearlyFeasible[clearlyInfeasible] .= false
    score = MCMC["Priorities"]
    scorediff = score .- MCMC["cutoff"]' #nonnegative if would be admitted
    maxlength = year==2012 ? 10 : 8
    #
    applengths = sum(AppsRelevant .> 0, dims=1) ./ size(AppsRelevant,1)
    marginal_because_waitlist = platform' .& (scorediff .< -bw) .& ( (score .- MCMC["wlcutoff"]') .>= 0)
    shareClearlyInfeasible = mean(clearlyInfeasible)./mean(platform)
    shareClearlyFeasible = mean(clearlyFeasible)./mean(platform)
    shareMarginalWaitlist = mean(marginal_because_waitlist)./mean(platform)
    _llbinds = [(a > 0) && (!clearlyFeasible[ii,a]) for (ii,a) in enumerate(AppsRelevant[:,maxlength])]
    listLengthBinds = mean(_llbinds)
    _unstable = [a > 0 && (scorediff[ii,a] < 0) for (ii,a) in enumerate(AppsRelevant[:,maxlength])]
    unstable = sum(_unstable .& (MCMC["placementIndex"] .== 0))
    mydict = OrderedDict(:year=>year, :bw=>bw/100, :pctTop4binds=>100*mean(top4binds),
        :pctAtLeast4apps=>100*mean(atleast4apps),
        :pctClearlyFeasible=>100*shareClearlyFeasible,
        :pctMarginalBecauseWaitlist=>100*shareMarginalWaitlist,
        :pctClearlyInfeasible=>100*shareClearlyInfeasible,
        :pctListLengthBinds=>100 * listLengthBinds,:pctUnstable=>100 * unstable/Int(MCMC["I"]),
        :unstableCount=>unstable)
    for ll=1:10
        mydict[Symbol("pctListAtLeast$ll")] = 100*applengths[ll]
        mydict[Symbol("ApplyUCPUCAtLeast$ll")] = sum(Napps_uc_puc .>= ll)
    end
    NamedTuple(k=>mydict[k] for k in keys(mydict))
end

if resimulate
    df_simulated = DataFrame()
    df = DataFrame()
    for year in [2010,2011,2012]
        println("loading data: $year")
        f = matopen(datapath*"/export_to_julia_$(year).mat")
            MCMC = MAT.read(f,"MCMC$(year)")
        close(f)
        for bw in bws
            println("getting stats: year=$year, bw=$bw")
            myrow = computeAppStats(MCMC,bw)
            push!(df,myrow)
        end
        for (r,t) in enumerate(save_lv)
            simulatedApps = Array{Int,2}(CSV.read(outputpath*"/simulatedROL$(year)_$t.csv",DataFrame)) #(I,J): (i,i) cell is rank of j in i's app (0 if not included)
            AppsAll = zeros(Int,size(simulatedApps)) #want (i,r) cell to be program that i ranked rth according to true preferences
            Threads.@threads for ii=1:size(simulatedApps,1)
                app_i = view(simulatedApps,ii,:)
                nApps_i = sum(app_i .!= 0)
                AppsAll[ii,1:nApps_i] .= sortperm(app_i)[length(app_i)+1-nApps_i:end]
            end
            for bw in bws
                println("getting stats: year=$year, draw=$r, bw=$bw")
                myrow = (draw=r, computeAppStats(MCMC,bw,simulatedApps)...)
                push!(df_simulated,myrow)
            end
        end
    end
    CSV.write(outputpath*"/bandwidthSummaryStats.csv",df)
    CSV.write(outputpath*"/bandwidthSummaryStatsSimulated.csv",df_simulated)
else
    df = CSV.read(outputpath*"/bandwidthSummaryStats.csv",DataFrame)
    df_simulated = CSV.read(outputpath*"/bandwidthSummaryStatsSimulated.csv",DataFrame)
end
#df_export = df[:,not([:pctClearlyFeasible,:pctClearlyInfeasible,:pctListLengthBinds])]


using StatsPlots
plt12 = plot()
for row in eachrow(df[df.year .== 2012, :])
    plot!(plt12,1:10,[row[Symbol("pctListAtLeast$t")] for t=1:10],label="bw=$(Int(row.bw))")
end
xlabel!(plt12,"% of students with relevant app length >= n")

plt11 = plot()
for row in eachrow(df[df.year .== 2011, :])
    plot!(plt11,1:8,[row[Symbol("pctListAtLeast$t")] for t=1:8],label="bw=$(Int(row.bw))")
end
xlabel!(plt11,"% of students with relevant app length >= n")

plt10 = plot()
for row in eachrow(df[df.year .== 2010, :])
    plot!(plt10,1:8,[row[Symbol("pctListAtLeast$t")] for t=1:8],label="bw=$(Int(row.bw))")
end
xlabel!(plt10,"% of students with relevant app length >= n")


savefig(plt12,figurepath*"/relevantAppLength2012.png")
savefig(plt11,figurepath*"/relevantAppLength2011.png")
savefig(plt10,figurepath*"/relevantAppLength2010.png")



for year in 2010:2012
    series1 = df[df.year.==year,:pctClearlyFeasible]
    series2 = df[df.year.==year,:pctClearlyInfeasible]
    series3 = 100 .- (series1 .+ series2)
    #series3[series3 .< 0] .= 0
    ticklabel = string.(Int.(bws./100))
    myfig = groupedbar([series1 series3 series2], bar_position=:stack, bar_width=0.7, xticks=(1:6,ticklabel),
        label=["Clearly Feas." "Marginal" "Clearly Infeas."], legend=:bottomright, xlabel="Bandwidth", title="Pct. of student/program pairs, $year")
    savefig(myfig,figurepath*"/relevantProgramCategory$year.png")
end

function makeBwTable(f,df)
    println(f,"\\begin{tabular}{lcccc}")
    println(f,"\\hline\\hline")
    println(f,"Year & Bandwidth & \\% List Length Binds & \\% Possibly Unstable & N Possibly Unstable  \\\\ \\hline")
    #
    for year in 2010:2012
        println(f,"& & & & \\\\")
        for row in eachrow(df[df.year.==year,:])
            println(f, "$(row.year) & $(Int(row.bw)) & $(round(row.pctListLengthBinds,digits=2)) & $(round(row.pctUnstable,digits=2)) & $(row.unstableCount) \\\\")
        end
    end
    println(f," & & & & \\\\ \\hline \\hline")
    println(f,"\\end{tabular}")
end

open(tablepath*"/bandwidthRobustness.tex","w") do f
    makeBwTable(f,df)
end


function makeUCPUCTable(f,df)
    println(f,"\\begin{tabular}{lccccc}")
    println(f,"\\hline\\hline")
    println(f,"Year & Bandwidth & \\% Applying Any UC/PUC & \\% Applying At Least 2 & \\% 3 or More & \\% 4 or more \\\\ \\hline")
    for year in 2010:2012
        println(f,"& & & & & \\\\")
        dfy = df[df.year.==year,:]
        n = dfy[1,:unstableCount]/(dfy[1,:pctUnstable]/100)
        for row in eachrow(dfy)
            println(f, "$(row.year) & $(Int(row.bw)) & $(round(100*row.ApplyUCPUCAtLeast1/n,digits=2)) & $(round(100*row.ApplyUCPUCAtLeast2/n,digits=2)) & $(round(100*row.ApplyUCPUCAtLeast3/n,digits=2)) & $(round(100*row.ApplyUCPUCAtLeast4/n,digits=2)) \\\\")
        end
    end
    println(f," & & & & & \\\\ \\hline \\hline")
    println(f,"\\end{tabular}")
end


open(tablepath*"/bandwidthUCPUC.tex","w") do f
    makeUCPUCTable(f,df)
end


for year in [2010,2011,2012]
    n = df[df.year .== year, :unstableCount][1] ./ df[df.year .== year, :pctUnstable][1] #n/100
    pltUCPUC = plot()
    for row in eachrow(df[df.year .== year, :])
        plot!(pltUCPUC,1:8, [row[Symbol("ApplyUCPUCAtLeast$t")]./n for t=1:8],label="bw=$(Int(row.bw))")
    end
    xlabel!(pltUCPUC,"% of students with at least n relevant UC / PUC apps, $year")
    savefig(pltUCPUC,figurepath*"/relevantAppsUCPUC$year.png")
end


df_sim_combined = combine( groupby(df_simulated,[:year,:bw]), :pctTop4binds => mean, :pctListAtLeast4 => mean,
[Symbol("ApplyUCPUCAtLeast$t")=>(x-> 100 * mean(x))=>Symbol("ApplyUCPUCAtLeast$t") for t=1:4]... )